## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.2.1 ✔ dplyr 1.1.2
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'lubridate'
##
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
##
##
## corrplot 0.92 loaded
##
##
## Attaching package: 'ggpp'
##
##
## The following object is masked from 'package:ggplot2':
##
## annotate
##
##
##
## Attaching package: 'flextable'
##
##
## The following object is masked from 'package:purrr':
##
## compose
##
##
## Loading required package: Matrix
##
##
## Attaching package: 'Matrix'
##
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
##
##
## Attaching package: 'lmerTest'
##
##
## The following object is masked from 'package:lme4':
##
## lmer
##
##
## The following object is masked from 'package:stats':
##
## step
##
##
##
## Attaching package: 'maps'
##
##
## The following object is masked from 'package:purrr':
##
## map
##
##
## ℹ Google's Terms of Service: ]8;;https://mapsplatform.google.com<https://mapsplatform.google.com>]8;;
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
##
## Attaching package: 'scales'
##
##
## The following object is masked from 'package:purrr':
##
## discard
##
##
## The following object is masked from 'package:readr':
##
## col_factor
##
##
## Loading required package: sp
##
## Checking rgeos availability: TRUE
## Please note that 'maptools' will be retired by the end of 2023,
## plan transition at your earliest convenience;
## some functionality will be moved to 'sp'.
##
##
## Attaching package: 'raster'
##
##
## The following object is masked from 'package:lme4':
##
## getData
##
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## rgeos version: 0.6-3, (SVN revision 696)
## GEOS runtime version: 3.10.2-CAPI-1.16.0
## Please note that rgeos will be retired during October 2023,
## plan transition to sf or terra functions using GEOS at your earliest convenience.
## See https://r-spatial.org/r/2023/05/15/evolution4.html for details.
## GEOS using OverlayNG
## Linking to sp version: 1.6-0
## Polygon checking: TRUE
##
##
##
## Attaching package: 'rgeos'
##
##
## The following object is masked from 'package:dplyr':
##
## symdiff
This script pulls in cleaned stream temperature data from TASR sites (data cleaning completed in the teton_data_cleaning.Rmd script) and SNOTEL data from the Phillips Bench and Grand Targhee SNOTEL stations, then runs and tests models of august stream temperature as a function of both time and weather-related variables. Input datasets are:
Primary Sections are:
I. Read in and clean data II. Data checking and predictor selection III. Model testing and selection IV. Model visualizations
Read in and clean data:
#Source data:
source <- read.csv("source_info.csv")%>%
rename(site = stream) #rename for merge
#Temperature data:
temp_data <- read.csv("Temperature/cleaned_full_datasets/temps_hourly.csv")%>%
mutate(date1 = ymd(date1),
year = year(date1))%>%
filter(!is.na(temp_c), !year == 2023)%>% #Drop missing data, 2023 data (no August data for 2023 yet).
merge(source, all = T)%>% #Add in source data
group_by(site, full_name, stream_code, date1, year, lat, long)%>%
summarise(t_xbar = mean(temp_c, na.rm = T),
t_max = max(temp_c, na.rm = T),
t_min = min(temp_c, na.rm = T),
t_range = t_max-t_min,
source = unique(source))%>% #calculate daily mean, min, and max temps for each site along with temp range
filter(month(date1)==8)%>% #extract data for August only
group_by(site)%>% #group by stream
mutate(n_yrs = length(unique(year)))%>%
ungroup()%>%
filter(!n_yrs<3)
## `summarise()` has grouped output by 'site', 'full_name', 'stream_code',
## 'date1', 'year', 'lat'. You can override using the `.groups` argument.
#SNOTEL data:
snotel <- read.csv("teton_snotel_23.csv")%>%
mutate(date1 = as.Date(date, format = "%m/%d/%y"), #recode date column as R date
airtemp_c = as.numeric(airtemp_c))%>% #recode airtemp as numeric
group_by(date1)%>% #group by date
summarise(swe_xbar = mean(swe_cm), #calculate mean SWE and airtemp across both stations
airtemp_xbar = mean(airtemp_c, na.rm = T))%>%
mutate(mo = month(date1), year = year(date1))%>% #Add month and year cols
group_by(year)%>% #group by year
mutate(swe_max = max(swe_xbar), #extract max SWE and add to new col
swe_max_date = date1[which.max(swe_xbar)], #extract max swe date and add to new column
swe_zero_date = date1[which.min(swe_xbar)][1])%>% #extract first day with swe = 0 for each year and add to new column
ungroup()%>% #ungroup to get all columns back
filter(mo == 8|mo ==7)%>% #extract July & August temperatures
group_by(year)%>% #group by year
summarise(airtemp_summer = mean(airtemp_xbar), #calculate mean july-aug air temp
swe_max = unique(swe_max),
swe_max_date = ymd(unique(swe_max_date)),
swe_zero_date =ymd(unique(swe_zero_date)))%>% #keep max SWE and, max swe date, and swe zero date
mutate(melt_days = as.numeric(swe_zero_date-swe_max_date), #calculate melt days (swe zero date minus max swe date)
swe_max_doy = yday(swe_max_date),
swe_zero_doy = yday(swe_zero_date)) #add columns for swe max and swe zero day of year - easier to use as predictor.
Merge stream and SNOTEL data:
temp_snotel <- merge(temp_data, snotel)
Pivot data for easier plotting:
temp_pivot <- temp_snotel%>%
pivot_longer(!c(year:long, source:swe_zero_doy), names_to = "temp_type", values_to = "stream_temp")%>% #Pivot to get column for temperature, row for each temp type (min, max, mean, range) for each day
mutate(transform = log10(stream_temp))%>%
mutate(yr_scale = scale(year), #create centered/scaled version of each predictor varaible
sweMax_scale = scale(swe_max),
sweMaxDoy_scale = scale(swe_max_doy),
sweZeroDoy_scale = scale(swe_zero_doy),
meltDays_scale = scale(melt_days),
summerAirtemp_scale = scale(airtemp_summer))
Quick visualizations - how are the data distributed across all groups and years?
ggplot(temp_pivot,aes(x = stream_temp, fill = factor(source)))+ # variable = stream temp
geom_histogram()+ #draw histogram
facet_wrap(~temp_type, scales = "free_x")+ #facet by temperature type
theme_classic() #get rid of extra formatting
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
which(temp_pivot$stream_temp<=0)
## [1] 187 380 408 444 1228 1419 1732 1739 1924 2040 2620 2748 3379 3467 3539
## [16] 3640 3683 3692 3720 3740 3752 4271 4275 4308 4419 4423 4436 4556 4572 4672
## [31] 4812 4888 5004 5008 5176 5255 5364 5384 5644 5820 5968 5984 6064 6108 6216
## [46] 6364 6420 6624 6644 6848 6888
temp_pivot[which(temp_pivot$stream_temp<=0),]
## # A tibble: 51 × 25
## year site full_name stream_code date1 lat long source n_yrs
## <dbl> <chr> <chr> <chr> <date> <dbl> <dbl> <chr> <int>
## 1 2015 s_cascade South Casca… SC 2015-08-23 43.7 -111. sub_i… 5
## 2 2016 n_teton North Teton NT 2016-08-04 43.8 -111. snowm… 8
## 3 2016 s_cascade South Casca… SC 2016-08-02 43.7 -111. sub_i… 5
## 4 2016 windcave Wind Cave WC 2016-08-05 43.7 -111. sub_i… 6
## 5 2017 s_ak_basin S. AK Basin AK 2017-08-11 43.7 -111. sub_i… 6
## 6 2018 s_teton South Teton ST 2018-08-27 43.7 -111. snowm… 6
## 7 2018 mid_teton Middle Teton MT 2018-08-01 43.7 -111. glaci… 7
## 8 2018 grizzly Grizzly GR 2018-08-27 43.8 -111. snowm… 5
## 9 2018 s_teton South Teton ST 2018-08-03 43.7 -111. snowm… 6
## 10 2018 delta Delta DE 2018-08-01 43.7 -111. glaci… 6
## # ℹ 41 more rows
## # ℹ 16 more variables: airtemp_summer <dbl>, swe_max <dbl>,
## # swe_max_date <date>, swe_zero_date <date>, melt_days <dbl>,
## # swe_max_doy <dbl>, swe_zero_doy <dbl>, temp_type <chr>, stream_temp <dbl>,
## # transform <dbl>, yr_scale <dbl[,1]>, sweMax_scale <dbl[,1]>,
## # sweMaxDoy_scale <dbl[,1]>, sweZeroDoy_scale <dbl[,1]>,
## # meltDays_scale <dbl[,1]>, summerAirtemp_scale <dbl[,1]>
Minimum and range are left-skewed (especially range); mean and max look bimodal, especially for snowmelt. Same patterns within years.
Also have 51 0’s in the temperature column (either temp range or minimum)
Testing Log transform:
ggplot(temp_pivot,aes(x = transform, fill = factor(source)))+ # variable = stream temp
geom_histogram()+ #draw histogram
facet_wrap(~temp_type, scales = "free_x")+ #facet by temperature type
theme_classic() #get rid of extra formatting
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 51 rows containing non-finite values (stat_bin).
Log transform doesn’t really help - just makes t_min and t_range right-skewed, still have weird stuff going on with max and mean.
Possible predictor variables are:
Checking relationships between predictors (centered/scaled) and each response variable; colors for each source type. For simplicity, convert data to get one column for predictor type, one column for predictor value:
Data prep:
pred_pivot <- temp_pivot%>%
dplyr::select(-c(airtemp_summer:swe_zero_doy), -year)%>% #drop swe max and zero date columns
pivot_longer(!c(site:n_yrs, temp_type, stream_temp, transform), names_to = "predictor", values_to = "pred_value") #pivot to get columns for predictor name and predictor value; rows for each predictor x temp type x stream x year combo
Plotting for each potential model:
Min temperature:
p1 <- ggplot(filter(pred_pivot, temp_type == "t_min"), aes(x = pred_value, y = stream_temp, color = source))+ #data = snowmelt only; x = predictor value, y = stream temp, color = stream temperature type
geom_point()+ #scatterplot
geom_smooth()+ #fit smooth fxn to data
facet_wrap(~predictor, scales = "free")+ #facet by predictor type; free scales.
labs(title = "t_min")
p1
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Relationships:
Mean temperature:
p2 <- ggplot(filter(pred_pivot, temp_type == "t_xbar"), aes(x = pred_value, y = stream_temp, color = source))+ #data = snowmelt only; x = predictor value, y = stream temp, color = stream temperature type
geom_point()+ #scatterplot
geom_smooth()+ #fit smooth fxn to data
facet_wrap(~predictor, scales = "free")+ #facet by predictor type; free scales.
labs(title = "t_xbar")
p2
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Relationships:
T_max:
p3 <- ggplot(filter(pred_pivot, temp_type == "t_max"), aes(x = pred_value, y = stream_temp, color = source))+ #data = snowmelt only; x = predictor value, y = stream temp, color = stream temperature type
geom_point()+ #scatterplot
geom_smooth()+ #fit smooth fxn to data
facet_wrap(~predictor, scales = "free")+ #facet by predictor type; free scales.
labs(title = "t_max")
p3
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Relationships:
T Range
p4 <- ggplot(filter(pred_pivot, temp_type == "t_range"), aes(x = pred_value, y = stream_temp, color = source))+ #data = snowmelt only; x = predictor value, y = stream temp, color = stream temperature type
geom_point()+ #scatterplot
geom_smooth()+ #fit smooth fxn to data
facet_wrap(~predictor, scales = "free")+ #facet by predictor type; free scales.
labs(title = "t_range")
p4
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Combine and save:
predictor.grid <- p1/p2/p3/p4
ggsave("Temperature/plots/model_figs/predictor_grid.jpg", predictor.grid, device = "jpeg", units = "in", dpi = "retina", height = 16, width = 8.5)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
If modeling year and swe max, linear assumption is probably ok. If including other varaibles, may need to use a non-linear method.
Extract predictor variables:
predictors <- temp_snotel%>%
dplyr::select(year, airtemp_summer, swe_max, melt_days, swe_max_doy, swe_zero_doy)%>% #extract predictor variables
distinct() #remove duplicate values
Calculate correlation matrix and melt for plotting:
cormat <-round(cor(predictors), 2) #create correlation matrix with rounded values
cormat[upper.tri(cormat)]<- NA #remove upper triangle values for plotting
cormat_melt<-reshape2::melt(cormat, na.rm = T)%>% #melt for plotting
arrange(desc(value))
cormat_melt #check output
## Var1 Var2 value
## 1 year year 1.00
## 2 airtemp_summer airtemp_summer 1.00
## 3 swe_max swe_max 1.00
## 4 melt_days melt_days 1.00
## 5 swe_max_doy swe_max_doy 1.00
## 6 swe_zero_doy swe_zero_doy 1.00
## 7 airtemp_summer year 0.75
## 8 swe_max_doy airtemp_summer 0.72
## 9 swe_zero_doy melt_days 0.59
## 10 swe_max_doy year 0.58
## 11 swe_zero_doy swe_max 0.55
## 12 swe_zero_doy airtemp_summer 0.42
## 13 swe_max_doy swe_max 0.26
## 14 swe_zero_doy year 0.22
## 15 swe_max airtemp_summer 0.21
## 16 melt_days swe_max 0.20
## 17 swe_zero_doy swe_max_doy 0.19
## 18 swe_max year -0.18
## 19 melt_days airtemp_summer -0.28
## 20 melt_days year -0.32
## 21 swe_max_doy melt_days -0.68
Plot correlation matrix:
ggplot(cormat_melt, aes(x = Var1, y = Var2, fill = value))+ #plotting: fill = correlation value, axes = varaibles
geom_tile(color = "white")+ #tile graph to get "pixels" for each predictor combo
scale_fill_gradient2(low = "blue", high = "red", mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="Pearson\nCorrelation") + #gradient fill based on correlation
theme_minimal()+ #removing xtra formatting
theme(axis.text.x = element_text(angle = 45, vjust = 1,
size = 10, hjust = 1),
axis.text.y = element_text(
size = 10),
axis.title = element_blank())+ #adjusting axis text formatting
coord_fixed()+ #fixed coordinate system
geom_text(aes(label = value)) #adding labels with correlation values to each tile.
Variables with Correlation > 0.7:
Variables with correlation between 0.6 & 0.7
Variables with correlation between 0.5 & 0.6
Takeaways:
Possible overall models:
Random effects structure:
Fixed effect: year
Overall model: temperature (min, mean, or max) ~ year + (1|source)+(1|site)
Nest data by temperature type:
temp_nest <- temp_pivot%>%
nest(data = -temp_type)
Fit models for each temperature type (NOT including year*source interaction):
yr.lmms <- temp_nest %>%
mutate(model = purrr::map(data, ~lmerTest::lmer(stream_temp~year+(1|source)+(1|site), data = .)%>% #run a model of temp~year for each site (referenced by "."); store results in new column called "model"
broom.mixed::tidy(effects="fixed", conf.int = TRUE)))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
unnest(model)%>% #unnest model column to get rows for each site, columns for model metrics
mutate(p.value = round(p.value, 4))%>%
filter(!term == "(Intercept)")%>%
dplyr::select(temp_type, term, estimate, std.error, statistic, df, p.value)%>%
rename(predictor_name = term)
yr.lmms
## # A tibble: 4 × 7
## temp_type predictor_name estimate std.error statistic df p.value
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 t_xbar year 0.213 0.0189 11.3 1728. 0
## 2 t_max year 0.194 0.0322 6.02 1727. 0
## 3 t_min year 0.222 0.0145 15.4 1724. 0
## 4 t_range year -0.0296 0.0268 -1.10 1723. 0.270
Significant increase in mean, min, and max temperature over time across all sites at the P<0.001 level. Effect sizes:
No change in daily temperature change.
Model including year x source interaction:
Overall model: temp(mean, min, max, or range) = year + year*source + (1|source) + (1|ite)
yr.lmms <- temp_nest %>%
mutate(model = purrr::map(data, ~lmerTest::lmer(stream_temp~year+year*source+(1|source)+(1|site), data = .)%>% #run a model of temp~year for each site (referenced by "."); store results in new column called "model"
broom.mixed::tidy(effects="fixed", conf.int = TRUE)))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
unnest(model)%>% #unnest model column to get rows for each site, columns for model metrics
mutate(p.value = round(p.value, 4))%>%
filter(!term == "(Intercept)")%>%
dplyr::select(temp_type, term, estimate, std.error, statistic, df, p.value)%>%
rename(predictor_name = term)
## Warning: There were 3 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `model = purrr::map(...)`.
## Caused by warning in `checkConv()`:
## ! unable to evaluate scaled gradient
## ℹ Run ]8;;ide:run:dplyr::last_dplyr_warnings()dplyr::last_dplyr_warnings()]8;; to see the 2 remaining warnings.
yr.lmms
## # A tibble: 20 × 7
## temp_type predictor_name estimate std.error statistic df p.value
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 t_xbar year 0.0776 0.0328 2.37 1.58e+3 0.0181
## 2 t_xbar sourcesnowmelt -971. 90.0 -10.8 5.15e-1 0.188
## 3 t_xbar sourcesub_ice 199. 89.9 2.21 5.13e-1 0.419
## 4 t_xbar year:sourcesnowmelt 0.483 0.0445 10.9 1.70e+3 0
## 5 t_xbar year:sourcesub_ice -0.0985 0.0445 -2.22 1.67e+3 0.0268
## 6 t_max year -0.00593 0.0574 -0.103 7.74e+2 0.918
## 7 t_max sourcesnowmelt -1265. 158. -8.02 2.32e-1 0.452
## 8 t_max sourcesub_ice 126. 158. 0.799 2.32e-1 0.756
## 9 t_max year:sourcesnowmelt 0.631 0.0780 8.09 1.25e+3 0
## 10 t_max year:sourcesub_ice -0.0623 0.0779 -0.800 1.36e+3 0.424
## 11 t_min year 0.122 0.0248 4.92 1.72e+3 0
## 12 t_min sourcesnowmelt -785. 67.9 -11.6 1.72e+3 0
## 13 t_min sourcesub_ice 211. 67.9 3.11 1.72e+3 0.0019
## 14 t_min year:sourcesnowmelt 0.390 0.0336 11.6 1.72e+3 0
## 15 t_min year:sourcesub_ice -0.105 0.0336 -3.11 1.72e+3 0.0019
## 16 t_range year -0.128 0.0491 -2.61 1.44e+3 0.0091
## 17 t_range sourcesnowmelt -479. 135. -3.56 1.41e+0 0.115
## 18 t_range sourcesub_ice -82.2 135. -0.611 1.40e+0 0.626
## 19 t_range year:sourcesnowmelt 0.240 0.0667 3.60 1.61e+3 0.0003
## 20 t_range year:sourcesub_ice 0.0407 0.0666 0.611 1.65e+3 0.541
Note for interpretation - reference condition = glacier-fed streams
t_xbar:
t_max:
t_min:
t_range:
For loop to create model objects for each temperature type and draw residual histograms, qq plots, and residual vs fitted plots for each temperature type. Overall model is:
Overall model: temp(mean, min, max, or range) = year + year*source + (1|source) + (1|ite)
temp.types <- unique(temp_pivot$temp_type) #vector of all temp types
models <- list() #empty list to recieve models
plots <- list() #empty list to recieve diagnostic plots
for(i in 1:length(temp.types)){
d <- temp_pivot%>%
filter(temp_type == temp.types[i]) #extract rows with first temp type from list
mod <- lmer(stream_temp~ #response variable
year+(year*source)+ #fixed effects (including year x source interaction)
(1|source)+(1|site), #random effects
data = d) #data source
models[[i]]<-mod #save model object in model list
df_mod <- broom.mixed::augment(mod) #augment model and convert to df for model diagnostic plots
df_mod[".stdresid"] <- resid(mod, type = "pearson") #calculate residuals for model
resid<-ggplot(df_mod, aes(x = .fitted, y = .resid))+ #plot fitted vs residuals
geom_point()+ #points
geom_hline(yintercept = 0)+ #horizontal line at y = 0
labs(title = paste("Residual vs Fitted", temp.types[i], sep = " - ")) #axis labels
qq<- ggplot(df_mod, aes(sample = .stdresid)) + #qq plot
geom_qq() +
geom_qq_line()+ #add points and lines
labs(title = paste("QQ Plot", temp.types[i], sep = " - ")) #axis labels
hist<- ggplot(df_mod, aes(x = .resid))+
geom_histogram()+ #draw histogram of residuals
labs(title = paste("Residual Histogram", temp.types[i], "= year+(year*source)+(1|source)+(1|site)", sep = " ")) #axis labels
plots[[i]]<-(resid|qq)/hist #store residual + qq plot in plots list
print(paste("finished", temp.types[i], sep = " "))
}
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues
## [1] "finished t_xbar"
## [1] "finished t_max"
## Warning: Model failed to converge with 1 negative eigenvalue: -5.1e-07
## [1] "finished t_min"
## [1] "finished t_range"
Individual model summaries:
models[[1]]
## Linear mixed model fit by REML ['lmerModLmerTest']
## Formula: stream_temp ~ year + (year * source) + (1 | source) + (1 | site)
## Data: d
## REML criterion at convergence: 6039.945
## Random effects:
## Groups Name Std.Dev.
## site (Intercept) 0.8216
## source (Intercept) 3.2072
## Residual 1.3586
## Number of obs: 1733, groups: site, 13; source, 3
## Fixed Effects:
## (Intercept) year sourcesnowmelt
## -153.78969 0.07756 -970.78453
## sourcesub_ice year:sourcesnowmelt year:sourcesub_ice
## 198.92137 0.48313 -0.09853
## optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 2 lme4 warnings
models[[2]]
## Linear mixed model fit by REML ['lmerModLmerTest']
## Formula: stream_temp ~ year + (year * source) + (1 | source) + (1 | site)
## Data: d
## REML criterion at convergence: 7977.107
## Random effects:
## Groups Name Std.Dev.
## site (Intercept) 1.487
## source (Intercept) 5.893
## Residual 2.380
## Number of obs: 1733, groups: site, 13; source, 3
## Fixed Effects:
## (Intercept) year sourcesnowmelt
## 1.621e+01 -5.929e-03 -1.265e+03
## sourcesub_ice year:sourcesnowmelt year:sourcesub_ice
## 1.258e+02 6.305e-01 -6.234e-02
models[[3]]
## Linear mixed model fit by REML ['lmerModLmerTest']
## Formula: stream_temp ~ year + (year * source) + (1 | source) + (1 | site)
## Data: d
## REML criterion at convergence: 5077.39
## Random effects:
## Groups Name Std.Dev.
## site (Intercept) 0.9618
## source (Intercept) 1.7975
## Residual 1.0256
## Number of obs: 1733, groups: site, 13; source, 3
## Fixed Effects:
## (Intercept) year sourcesnowmelt
## -244.0024 0.1219 -785.0235
## sourcesub_ice year:sourcesnowmelt year:sourcesub_ice
## 211.0062 0.3900 -0.1045
models[[4]]
## Linear mixed model fit by REML ['lmerModLmerTest']
## Formula: stream_temp ~ year + (year * source) + (1 | source) + (1 | site)
## Data: d
## REML criterion at convergence: 7442.001
## Random effects:
## Groups Name Std.Dev.
## site (Intercept) 1.882
## source (Intercept) 3.716
## Residual 2.034
## Number of obs: 1733, groups: site, 13; source, 3
## Fixed Effects:
## (Intercept) year sourcesnowmelt
## 261.24228 -0.12829 -479.02364
## sourcesub_ice year:sourcesnowmelt year:sourcesub_ice
## -82.23986 0.24007 0.04073
Check diagnostic plots:
plots[[1]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plots[[2]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plots[[3]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plots[[4]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
None of the models look particularly good - residuals vs fitted look ok, but qq plots do not follow lines. Residual histograms not normal.
Scatterplot of change in each temperature variable; x = year, y = temp, faceted by temp type, color = source
source.pal <- c("#C26ED6", "#F89225", "#76D96F")
source.names <- c("Glacier", "Snowmelt", "Icy Seep")
temp.yr <- ggplot(temp_pivot, aes(x = year, y = stream_temp, color = source))+
geom_point(alpha = 0.4)+
geom_smooth(method = "lm")+
facet_wrap(~temp_type, scales = "free", nrow = 4, ncol = 1)+
theme_classic()+
labs(x = "Year", y = "Temperature, C", color = "Source", title = "Model: temperature = year+(year*source)+(1|source)+(1|site)")+
scale_color_manual(values = source.pal, labels = source.names)+
theme(legend.position = "bottom",
strip.text = element_text(size = 10, face = "bold"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10, face = "bold"))
temp.yr
## `geom_smooth()` using formula 'y ~ x'
save:
ggsave("Temperature/plots/model_figs/temp_yr.jpg", temp.yr, device = "jpeg", units = "in", dpi = "retina", height = 11, width = 8.5)
## `geom_smooth()` using formula 'y ~ x'
Data prep and model fitting:
source_nest <- temp_pivot%>%
nest(data = -c(source, temp_type)) #nest data by source and temperature type
source.yr.lmms <- source_nest %>%
mutate(model = purrr::map(data, ~lmerTest::lmer(stream_temp~year+(1|site), data = .)%>% #run a model of temp~year for each site (referenced by "."); store results in new column called "model"
broom.mixed::tidy(effects="fixed", conf.int = TRUE)))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
unnest(model)%>% #unnest model column to get rows for each site, columns for model metrics
mutate(p.value = round(p.value, 4))%>%
filter(!term == "(Intercept)")%>%
dplyr::select(source, temp_type, term, estimate, std.error, statistic, df, p.value)%>% #drop extra columns
rename(predictor_name = term) #rename for easier reading
source.yr.lmms
## # A tibble: 12 × 8
## source temp_type predictor_name estimate std.error statistic df p.value
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 snowmelt t_xbar year 0.562 0.0438 12.8 731. 0
## 2 snowmelt t_max year 0.628 0.0745 8.43 730. 0
## 3 snowmelt t_min year 0.512 0.0332 15.4 728. 0
## 4 snowmelt t_range year 0.112 0.0620 1.81 728. 0.0706
## 5 sub_ice t_xbar year -0.0215 0.0151 -1.43 437. 0.155
## 6 sub_ice t_max year -0.0696 0.0357 -1.95 437. 0.0521
## 7 sub_ice t_min year 0.0176 0.00837 2.11 435. 0.0358
## 8 sub_ice t_range year -0.0886 0.0345 -2.57 436. 0.0106
## 9 glacier t_xbar year 0.0760 0.0109 7.00 555. 0
## 10 glacier t_max year -0.00677 0.0193 -0.350 555. 0.727
## 11 glacier t_min year 0.121 0.0107 11.3 555. 0
## 12 glacier t_range year -0.128 0.0191 -6.70 555. 0
Results:
Bottom Line: All streams have experienced some warming, but exist on a continuum - snowmelt streams have warmed uniformly (higher min, mean, and max), but daily temperature fluctuations remain about the same. Glacier fed streams have higher minimum and mean temperatures, but maximum temperatures have not changed. Subterranean ice fed streams have seen the least change, with the only significant increase being in daily minimum temperature. Both glacier fed and subterranean ice fed streams have seen a reduction in the magnitude of daily temperature fluctuations due to increasing minimum temperatures and stable (or maybe even decreasing for sub ice) maximum temperatures.
Random effects structure:
Fixed effects:
Possibilities are: airtemp_summer, swe_zero_doy, swe_max, melt_days, and swe_max_doy. airtemp_summer and swe_max_doy are strongly correlated (>0.7), so one needs to be dropped - swe_max_doy is also strongly correlated with melt_days, so dropping swe_max_doy allows more other variables to remain in the model. In addition, the timing of maximum snowpack is probably less relevant than when that snowpack melted out, which is captured by swe_zero_doy and melt_days. This leaves a fixed effects structure of:
airtemp_summer+swe_zero_doy+swe_max+melt_days (Using all centered/scaled variables)
Overall model (initial):
stream_temp ~ summerAirtemp_scale+sweZeroDoy_scale+sweMax_scale+meltDays_scale+(1|source)+(1|site)+(1|year)
Fit models and return summaries for each temperature type:
snow.lmms <- temp_nest %>%
mutate(model = purrr::map(data, ~lmerTest::lmer(stream_temp~summerAirtemp_scale+sweZeroDoy_scale+sweMax_scale+meltDays_scale+(1|source)+(1|site)+(1|year), data = .)%>% #run a model of temp~snotel vars for each site (referenced by "."); store results in new column called "model"
broom.mixed::tidy(effects="fixed", conf.int = TRUE)))%>% #tidy function to construct a summary table with model results (seperate tables for each site)
unnest(model)%>% #unnest model column to get rows for each site, columns for model metrics
mutate(p.value = round(p.value, 4))%>%
filter(!term == "(Intercept)")%>%
dplyr::select(temp_type, term, estimate, std.error, statistic, df, p.value)%>%
rename(predictor_name = term)%>%
arrange(p.value)
snow.lmms
## # A tibble: 16 × 7
## temp_type predictor_name estimate std.error statistic df p.value
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 t_min sweMax_scale -0.527 0.137 -3.84 2.93 0.0325
## 2 t_xbar sweMax_scale -0.593 0.175 -3.40 3.11 0.0402
## 3 t_max sweMax_scale -0.682 0.286 -2.39 2.80 0.103
## 4 t_min summerAirtemp_scale 0.144 0.163 0.888 3.19 0.436
## 5 t_range sweMax_scale -0.155 0.257 -0.603 2.40 0.598
## 6 t_range summerAirtemp_scale -0.162 0.305 -0.531 2.64 0.637
## 7 t_range meltDays_scale 0.189 0.467 0.405 2.62 0.716
## 8 t_min meltDays_scale -0.0970 0.249 -0.389 3.18 0.722
## 9 t_xbar summerAirtemp_scale 0.0634 0.207 0.306 3.41 0.777
## 10 t_xbar meltDays_scale -0.0630 0.318 -0.198 3.39 0.854
## 11 t_max meltDays_scale 0.0964 0.522 0.185 3.07 0.865
## 12 t_range sweZeroDoy_scale 0.0640 0.510 0.125 2.57 0.909
## 13 t_max sweZeroDoy_scale 0.0466 0.570 0.0818 3.01 0.940
## 14 t_min sweZeroDoy_scale -0.0128 0.273 -0.0469 3.12 0.965
## 15 t_max summerAirtemp_scale -0.0145 0.341 -0.0424 3.10 0.969
## 16 t_xbar sweZeroDoy_scale 0.0143 0.347 0.0412 3.32 0.970
In overall model, only significant predictor is swe_max - higher maximum swe is associated with lower minimum and average temperatures across all stream types.
For loop to create model objects and diagnostic plots for each temp model:
st.models <- list() #empty list to recieve models
st.plots <- list() #empty list to recieve diagnostic plots
for(i in 1:length(temp.types)){
d <- temp_pivot%>%
filter(temp_type == temp.types[i]) #extract rows with first temp type from list
mod <- lmer(stream_temp~ #response variable
summerAirtemp_scale+sweZeroDoy_scale+sweMax_scale+meltDays_scale+ #fixed effects
(1|source)+(1|site), #random effects
data = d) #data source
st.models[[i]]<-mod #save model object in model list
df_mod <- broom.mixed::augment(mod) #augment model and convert to df for model diagnostic plots
df_mod[".stdresid"] <- resid(mod, type = "pearson") #calculate residuals for model
resid<-ggplot(df_mod, aes(x = .fitted, y = .resid))+ #plot fitted vs residuals
geom_point()+ #points
geom_hline(yintercept = 0)+ #horizontal line at y = 0
labs(title = paste("Residual vs Fitted", temp.types[i], sep = " - ")) #axis labels
qq<- ggplot(df_mod, aes(sample = .stdresid)) + #qq plot
geom_qq() +
geom_qq_line()+ #add points and lines
labs(title = paste("QQ Plot", temp.types[i], sep = " - ")) #axis labels
hist<- ggplot(df_mod, aes(x = .resid))+
geom_histogram()+ #draw histogram of residuals
labs(title = paste(temp.types[i], "= sweMax+sweZeroDoy+meltDays+summerAirtemp+(1|source)+(1|site)+(1|year)", sep = " "))+ #axis labels
theme(title = element_text(size = 10))
st.plots[[i]]<-(resid|qq)/hist #store residual + qq plot in plots list
print(paste("finished", temp.types[i], sep = " "))
}
## [1] "finished t_xbar"
## [1] "finished t_max"
## [1] "finished t_min"
## [1] "finished t_range"
Check diagnostic plots:
st.plots[[1]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
st.plots[[2]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
st.plots[[3]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
st.plots[[4]]
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Same problems as previous models - qq plots look skewed, non-normal distribution.
Data Prep:
pred_data <- temp_pivot%>%
pivot_longer(!c(year:transform, yr_scale), names_to = "pred_name", values_to = "pred_value")
T Min:
ggplot(filter(pred_data, temp_type == "t_min"),aes(x = pred_value, y = stream_temp, color = source))+
geom_point(alpha = 0.4)+
geom_smooth(method = "lm")+
facet_wrap(~pred_name, scales = "free")+
theme_classic()+
labs(x = "Year", y = "Aug. 1-30 Minimum daily temperature, C", color = "Source")+
scale_color_manual(values = source.pal, labels = source.names)+
theme(legend.position = "bottom",
strip.text = element_text(size = 10, face = "bold"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10, face = "bold"))
## `geom_smooth()` using formula 'y ~ x'
T Xbar:
ggplot(filter(pred_data, temp_type == "t_xbar"),aes(x = pred_value, y = stream_temp, color = source))+
geom_point(alpha = 0.4)+
geom_smooth(method = "lm")+
facet_wrap(~pred_name, scales = "free")+
theme_classic()+
labs(x = "Year", y = "Aug. 1-30 mean daily temperature, C", color = "Source")+
scale_color_manual(values = source.pal, labels = source.names)+
theme(legend.position = "bottom",
strip.text = element_text(size = 10, face = "bold"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10, face = "bold"))
## `geom_smooth()` using formula 'y ~ x'
T Max:
ggplot(filter(pred_data, temp_type == "t_max"),aes(x = pred_value, y = stream_temp, color = source))+
geom_point(alpha = 0.4)+
geom_smooth(method = "lm")+
facet_wrap(~pred_name, scales = "free")+
theme_classic()+
labs(x = "Year", y = "Aug. 1-30 maximum daily temperature, C", color = "Source")+
scale_color_manual(values = source.pal, labels = source.names)+
theme(legend.position = "bottom",
strip.text = element_text(size = 10, face = "bold"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10, face = "bold"))
## `geom_smooth()` using formula 'y ~ x'
T range:
ggplot(filter(pred_data, temp_type == "t_range"),aes(x = pred_value, y = stream_temp, color = source))+
geom_point(alpha = 0.4)+
geom_smooth(method = "lm")+
facet_wrap(~pred_name, scales = "free")+
theme_classic()+
labs(x = "Year", y = "Aug. 1-30 daily temperature change, C", color = "Source")+
scale_color_manual(values = source.pal, labels = source.names)+
theme(legend.position = "bottom",
strip.text = element_text(size = 10, face = "bold"),
axis.text = element_text(size = 10),
axis.title = element_text(size = 10, face = "bold"))
## `geom_smooth()` using formula 'y ~ x'